file_tag = sprintf("%s_BL_%s", cell_type_name, graph_weight)
assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = sapply(gene_sets, strsplit, USE.NAMES=FALSE, split=",")
n_genes = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 23.00 24.00 23.73 25.00 27.00
## [1] 40
## [1] 3 20 21 21 22 22 22 23 23 23 23 24 24 24 24 24 24 24 24 24 24 25 25 25 25
## [26] 25 25 25 25 25 25 25 26 26 26 26 26 27 27 27
bioMart.All the gene symbols that can be found in bioMart are
consistent with what we have. So no need to run it.
ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"),
filters = "external_gene_name",
values = assayed_genes,
mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]
table(assayed_genes %in% gene_BM$external_gene_name)
t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]
table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]alias2Symbol function from
limma.a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
gi = assayed_genes[i]
ai = alias2Symbol(gi)
if(length(ai) > 1){
print(gi)
print(ai)
}
a2s[i] = ai[1]
}## [1] "QARS"
## [1] "EPRS1" "QARS1"
## [1] "SEPT2"
## [1] "SEPTIN6" "SEPTIN2"
##
## FALSE TRUE
## 1607 42
##
## FALSE TRUE <NA>
## 42 1565 42
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)
gene_info[sym_in_data != sym_limma,]## sym_in_data sym_limma
## 1: C10orf91 LINC02870
## 2: C12orf10 MYG1
## 3: C12orf45 NOPCHAP1
## 4: C6orf48 SNHG32
## 5: C6orf99 LINC02901
## 6: CXorf40A EOLA1
## 7: CXorf57 RADX
## 8: FAM102A EEIG1
## 9: FAM173A ANTKMT
## 10: FAM213B PRXL2B
## 11: H2AFX H2AX
## 12: HIST1H2AG H2AC11
## 13: HIST1H2BK H2BC12
## 14: HIST1H2BN H2BC15
## 15: HIST1H3A H3C1
## 16: HIST1H3H H3C10
## 17: HIST1H4C H4C3
## 18: HIST2H2BF H2BC18
## 19: KIAA0391 PRORP
## 20: QARS EPRS1
## 21: SEPT6 SEPTIN6
## 22: ARNTL BMAL1
## 23: C12orf65 MTRFR
## 24: C16orf72 HAPSTR1
## 25: CCDC84 CENATAC
## 26: DOPEY2 DOP1B
## 27: FAM126B HYCC2
## 28: FAM160B1 FHIP2A
## 29: H1FX H1-10
## 30: H2AFJ H2AJ
## 31: HEXDC HEXD
## 32: HIST1H1C H1-2
## 33: HIST1H1D H1-3
## 34: HIST1H1E H1-4
## 35: KIAA1109 BLTP1
## 36: KIAA1551 RESF1
## 37: MKL1 MRTFA
## 38: NARFL CIAO3
## 39: SEPT2 SEPTIN6
## 40: TARSL2 TARS3
## 41: TMEM8A PGAP6
## 42: WDR60 DYNC2I1
## sym_in_data sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]
dim(gene_info)## [1] 1649 3
## sym_in_data sym_limma gene_symbol
## 1: ABLIM1 ABLIM1 ABLIM1
## 2: AC004687.1 <NA> AC004687.1
## 3: AC004854.2 <NA> AC004854.2
## 4: AC007384.1 <NA> AC007384.1
## 5: AC007952.4 <NA> AC007952.4
## t1
## 1 2
## 1647 1
## sym_in_data sym_limma gene_symbol
## 1: SEPT6 SEPTIN6 SEPTIN6
## 2: SEPT2 SEPTIN6 SEPTIN6
Gene set annotations (by gene symbols) were downloaded from MSigDB website.
gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]] = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]] = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"
pathways = list()
for(k1 in names(gmtfile)){
pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}
names(pathways)## [1] "reactome" "go_bp" "immune"
## reactome go_bp immune
## 1692 7647 5219
Filter gene sets for size between 10 and 500.
## $reactome
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 7.0 9.0 12.0 17.0 23.0 31.0 44.0 71.8 120.9 1463.0
##
## $go_bp
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 6.0 8.0 10.0 14.0 19.0 29.0 46.0 80.8 183.0 1966.0
##
## $immune
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5 162 193 197 199 199 200 200 200 200 1992
## [1] 1649 3
max_n2kp = 10
goseq_res = NULL
for(k in 1:length(gene_sets)){
if(length(gene_sets[[k]]) < 10) { next }
print(k)
set_k = paste0("set_", k)
print(gene_sets[[k]])
genes = gene_info$sym_in_data %in% gene_sets[[k]]
names(genes) = gene_info$gene_symbol
table(genes)
pwf = nullp(genes, "hg38", "geneSymbol")
for(k1 in names(pathways)){
p1 = pathways[[k1]]
res1 = goseq(pwf, "hg38", "geneSymbol",
gene2cat=goseq:::reversemapping(p1))
res1$FDR = p.adjust(res1$over_represented_pvalue, method="BH")
nD = sum(res1$FDR < 0.1)
if(nD > 0){
res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
res1$category = gsub("_", " ", res1$category)
res1$category = tolower(res1$category)
res1$category = substr(res1$category, start=1, stop=81)
goseq_res[[set_k]][[k1]] = res1
}
}
}## [1] 1
## [1] "AC008555.5" "AC012645.3" "AC044849.1" "AC087623.3" "AC119396.1"
## [6] "AC245297.3" "ARRDC2" "CCL4L2" "CLDND1" "CRLF3"
## [11] "EOMES" "HIKESHI" "INTS6L" "LINC00649" "PITPNC1"
## [16] "TRAV8-3" "TRGV7" "TRGV8" "ADGRG1" "ARHGAP30"
## [21] "CYTOR" "FCRL6" "NECAP1" "TTC38" "XCL2"
## [1] 2
## [1] "AK5" "CHRM3-AS2" "COQ8A" "IGLV1-44" "LINC00402"
## [6] "LST1" "MATR3-1" "PDCD4-AS1" "RETREG1" "TC2N"
## [11] "TRAV14DV4" "TRAV8-2" "TRBV28" "TRBV9" "CLEC16A"
## [16] "DOCK10" "HIPK1" "HRH2" "IFI27" "NUTM2B-AS1"
## [21] "PIK3CD" "POLR2J3-1" "PPP4R3B" "PUM3" "S100A12"
## [26] "TUT4"
## [1] 3
## [1] "NUAK2" "TBCCD1" "ABCA7" "ARHGAP10" "BTBD9" "DMTF1"
## [7] "ERBIN" "FAM126B" "FAM78A" "GK5" "HECA" "IRF9"
## [13] "MCTP2" "MYBL1" "MYO1F" "NFATC3" "NRDC" "OGA"
## [19] "PARP15" "PATL2" "PIK3R5" "RASGRP1" "RNF125" "SIDT1"
## [25] "TUT7" "XIST"
## [1] 4
## [1] "CCR7" "FXYD7" "LTB" "SLC2A3" "AC016831.7"
## [6] "ARHGAP45" "ARHGEF9" "ATAD2B" "BMT2" "ETFDH"
## [11] "FAM133B" "GABPB2" "GPHN" "HPS4" "KAT6B"
## [16] "LINC02446" "MARF1" "OXNAD1" "PCED1B" "PNPLA8"
## [21] "PSMA3-AS1" "RIPOR2" "SEC14L1" "THAP5" "TMEM131L"
## [26] "ZNF83"
## [1] 6
## [1] "HLA-DMB" "KLRB1" "KLRC3" "KLRK1" "MAN2B1" "OXA1L"
## [7] "TFB2M" "TIGIT" "CMKLR1" "CSNK1G2" "GALNT10" "GDPD5"
## [13] "ITGAM" "KIR2DL3" "RAPGEF1" "SLC20A1" "SLC38A10" "TIMP1"
## [19] "TMEM127" "TMEM181" "TMEM8A" "TTC17"
## [1] 7
## [1] "CCNB1IP1" "ADGRE5" "ARL4C" "CARD11" "CARD16" "CD52"
## [7] "CTSW" "CX3CR1" "DDX60L" "IFITM2" "ITM2A" "KDM3B"
## [13] "KIAA1551" "LAG3" "LY6E" "MT2A" "MYO1G" "RALGAPB"
## [19] "S100A11" "TRAC" "TRAV12-3" "TRAV17" "TRAV9-2" "TRBV11-2"
## [25] "TRGC2" "ZNF683"
## [1] 8
## [1] "AIF1" "CHMP1B" "NSUN6" "CASP10" "CAST" "CCDC88C" "CD46"
## [8] "CELF2" "CFLAR" "DDHD1" "DENND6A" "ITK" "KCNAB2" "KDM5A"
## [15] "KLF3" "LRBA" "MTMR6" "NCKAP1L" "PCYT1A" "PRKCH" "RASA3"
## [22] "RIC3" "SPG11" "WDR7"
## [1] 9
## [1] "ID2" "ISCA1" "MAP3K2" "MSC" "NDE1" "SPECC1" "ABHD17A"
## [8] "BHLHE40" "DOCK11" "EFHD2" "FRYL" "GSE1" "LENG8" "MACF1"
## [15] "MAP3K3" "PHF3" "PLEKHG3" "POLH" "PRR5L" "PTPN23" "SSH1"
## [22] "STK38" "WAC" "ZFYVE16"
## [1] 10
## [1] "CD27" "CPNE1" "CREBL2" "CYB561A3" "EPB41L4A-AS1"
## [6] "FCER1G" "IER3" "ITGAE" "KIR3DL2" "LRRC23"
## [11] "NCR1" "NR4A3" "NT5DC1" "SDR42E2" "SLC38A1"
## [16] "TMEM107" "TMEM204" "TMEM42" "TSPYL4" "UIMC1"
## [21] "ZFAND1" "PARP4" "TEP1" "TRIM38"
## [1] 11
## [1] "GLA" "MAT2B" "ARHGEF3" "ARID5B" "BROX" "CAPNS1" "GCN1"
## [8] "GPRIN3" "IL2RG" "INO80D" "KLF2" "KLF6" "NCOA7" "NLRC5"
## [15] "PLA2G6" "PLAC8" "SENP7" "SLFN12L" "STK17B" "TOB1" "USP16"
## [22] "XAF1" "ZBP1" "ZBTB20" "ZDHHC20"
## [1] 12
## [1] "THAP9-AS1" "AKNA" "AP005482.1" "CEMIP2" "DIAPH2"
## [6] "GPR174" "LINC02384" "MIAT" "MX2" "NBEAL2"
## [11] "OAS2" "ODF3B" "PCSK7" "SAMD9L" "TBC1D14"
## [16] "THUMPD3-AS1" "TRANK1" "TRAPPC11" "TRAPPC8" "TRAV19"
## [21] "TRAV27" "TRAV4" "TRBV2" "TRDV1" "TRGV10"
## [26] "TRGV4" "TSPAN32"
## [1] 13
## [1] "AC083798.2" "AL121944.1" "ARMH1" "ATP2B1-AS1" "BBS9"
## [6] "FAM213B" "IGKV3-20" "INPP4B" "LRRN3" "NPIPB4"
## [11] "NUP58" "PRAG1" "RAB33B" "SLC27A5" "TNFRSF25"
## [16] "TRABD2A" "TRAV12-2" "TRAV5" "TRAV8-4" "TRBV3-1"
## [21] "TRBV6-1" "TRBV6-2" "ZNF749" "ZNF862" "MTERF2"
## [26] "RUFY2" "SLCO3A1"
## [1] 14
## [1] "AC007384.1" "AC020911.2" "AC025171.3" "AC083880.1" "AC091271.1"
## [6] "AC103591.3" "AF213884.3" "AL139246.5" "AL357060.1" "AL451085.1"
## [11] "AL627171.1" "ASL" "C6orf99" "HELQ" "HIPK1-AS1"
## [16] "IFRD1" "KCNQ1OT1" "LINC01465" "MZF1-AS1" "NT5C3B"
## [21] "OSER1-DT" "PGGHG" "RGS1" "TRAV3" "TRBV7-9"
## [26] "C16orf72" "HIVEP3"
## [1] 15
## [1] "ACTR1B" "C12orf10" "EIF1" "EIF2S3" "EIF3E" "EIF3G"
## [7] "EIF3K" "EIF3L" "EIF4A2" "PAIP2" "PHLDA1" "RACK1"
## [13] "ATAD2" "DDX60" "EIF3A" "EIF4G1" "EIF4G3" "LRRFIP1"
## [19] "MSI2" "PRRC2C" "SECISBP2"
## [1] 16
## [1] "AC004687.1" "AC087239.1" "AL118516.1" "AL138963.3" "ANXA2R"
## [6] "ATP5F1A" "BTG2" "CCNI" "CMC1" "FCMR"
## [11] "GCSAM" "GTF3A" "ICAM3" "KLRF1" "MFNG"
## [16] "NBPF14" "NOP53" "PPP1R15B" "PRR7" "RTRAF"
## [21] "SNHG15" "SNHG9" "TRG-AS1" "WDR86" "WSB1"
## [26] "ZNF276"
## [1] 17
## [1] "ARL4A" "IL6R" "RGCC" "TXK" "UBL7" "ZC3H12A" "ZNF10"
## [8] "CLUH" "COX19" "ELMO1" "ETV6" "GON4L" "LONP2" "NAA25"
## [15] "NFKBIZ" "PARP14" "PARP9" "PDE4B" "PHF14" "PIGF" "SETD2"
## [22] "USP34" "WDR37" "ZNF557"
## [1] 18
## [1] "AKTIP" "GLS" "HIST1H2BK" "HIST1H2BN" "IL16" "NCF1"
## [7] "PPA1" "SSR2" "TBC1D17" "BDP1" "CPT1A" "CYBA"
## [13] "DHX29" "FGR" "NEMF" "NNT" "PKD1" "PPP1R12C"
## [19] "PTMS" "UPF2"
## [1] 19
## [1] "ASAH1" "ATP5MG" "CD7" "GATA3" "GRAMD1A" "MZT2A"
## [7] "MZT2B" "PNRC1" "PPP1R15A" "PTGER4" "VAMP7" "CD99"
## [13] "DNAJB14" "KLHDC4" "NUP210" "PTPN4" "PTPN7" "RORA"
## [19] "ST6GAL1" "TBX21" "TUBGCP6" "ZFAND3"
## [1] 20
## [1] "ARHGAP9" "C12orf45" "CXXC5" "EI24" "GIMAP1" "GPR183"
## [7] "GSTM1" "GSTM4" "LCP2" "LETMD1" "PCMTD2" "PDE7A"
## [13] "RTN3" "SESN2" "TRAT1" "ARAP2" "FAM169A" "LRRC8A"
## [19] "MCOLN2" "MICAL2" "SZT2" "VPS13A" "VPS13D"
## [1] 21
## [1] "CAMK4" "CMTM7" "EPHX2" "EPS8" "FAM102A" "HIBADH"
## [7] "LDLRAP1" "NOSIP" "RCAN3" "SELL" "STMN3" "TCEA3"
## [13] "TCF7" "TESPA1" "TRAV21" "ZNF575" "GPANK1" "HERC3"
## [19] "HERC6" "ITPR2" "MAPK8IP3" "SCRN3" "SOS1" "VCAN"
## [25] "ZNF493"
## [1] 22
## [1] "AMD1" "C12orf57" "CD84" "DTHD1" "FAM118A" "GLTP"
## [7] "GZMK" "KLRG1" "NOCT" "PIK3IP1" "RCSD1" "RGL4"
## [13] "RSRP1" "SH2D1A" "SLC38A2" "STK17A" "TOX" "TRBC1"
## [19] "TRGV5" "WARS2" "CCDC112" "ERAP2" "SLK" "VPS13B"
## [25] "YPEL1"
## [1] 23
## [1] "ALOX5AP" "APMAP" "HDHD3" "LAPTM5" "LBH" "LIME1" "MATK"
## [8] "PTPRCAP" "PTRHD1" "RHOC" "THEM4" "TMEM134" "CBX7" "CST7"
## [15] "FGFBP2" "GZMA" "GZMB" "NKG7" "PHC3" "PLEK" "SPON2"
## [22] "SRGN" "UBE4A" "UCP2"
## [1] 24
## [1] "NT5E" "SNRPN" "ZEB1" "AFF1" "AFF4" "ARID1A" "ARID1B"
## [8] "BAZ2A" "CHD1" "CRNKL1" "DGKD" "NKTR" "PIP4K2B" "PNN"
## [15] "PPIG" "RNF157" "RNMT" "RNPC3" "SCAF8" "SMG1" "USP42"
## [22] "WDR60"
## [1] 25
## [1] "BTG1" "FOXP1" "KLHDC2" "PPP1R10" "PRPF38B" "SAT1"
## [7] "SATB1" "ZBTB10" "ARHGAP4" "ASH1L" "FAM160B1" "HERC1"
## [13] "MALAT1" "MORC3" "NEAT1" "RUNX3" "SLTM" "TCF25"
## [19] "TFDP2" "TTF2" "ZC3H7B" "ZEB2" "ZNF335"
## [1] 26
## [1] "AC004854.2" "AC015982.1" "AC016405.3" "AC027644.3" "AOAH"
## [6] "ARF4-AS1" "BX284668.6" "CRTAM" "CSKMT" "CXorf40A"
## [11] "IER5" "ILF3-DT" "KLF10" "KMT2E-AS1" "MAPRE2"
## [16] "MTRNR2L8" "PAPSS1" "SLC4A4" "TRAV38-2DV8" "TRBV6-5"
## [21] "TRBV7-2" "TRGV9" "Z93241.1" "GALNT3" "RLF"
## [1] 27
## [1] "HMGCS1" "RELT" "TPRKB" "CAPN15" "DNAJC13" "EPSTI1"
## [7] "GPATCH2L" "HSH2D" "IFI44" "IFI44L" "IGHA1" "IGKC"
## [13] "MBD5" "MX1" "PHF11" "PPM1K" "PRKX" "PTPRJ"
## [19] "RNF213" "S100A8" "SPOCK2" "STK10" "TTC14"
## [1] 28
## [1] "LPXN" "C2CD3" "CEP164" "CEP350" "CTDSPL2" "DENND4C" "ENTPD4"
## [8] "GIGYF1" "HELZ2" "HIPK3" "IFIT2" "IFIT3" "KIF13B" "LIMD1"
## [15] "MAN2C1" "N4BP1" "PHACTR4" "PSTPIP2" "REXO1" "RIF1" "SMCHD1"
## [22] "ZCCHC2" "ZNF292" "ZNF800"
## [1] 29
## [1] "CST3" "CYB5D2" "GALNT11" "INTS8" "PDE3B" "SAE1"
## [7] "ADAM10" "AHCTF1" "ASCL2" "CHST12" "CTSC" "DOPEY2"
## [13] "FAR1" "GALNT2" "HLA-DQA1" "HLA-DRB1" "KLRD1" "LILRB1"
## [19] "LPCAT1" "MPPE1" "MYO9B" "PDE12" "PNPLA6" "PRF1"
## [1] 30
## [1] "ABCC10" "AC092683.1" "AC116407.2" "ADCY7" "AP3M2"
## [6] "CCDC84" "CREBZF" "ELMOD3" "GPR132" "GRK2"
## [11] "IGKV3-15" "LINC02256" "MIGA1" "PCNX1" "RUBCN"
## [16] "SLF2" "SPATA13" "TENT5C" "TRBV4-2" "TRBV7-6"
## [21] "TTTY15" "UTY" "Z93930.2" "ZNF652" "ZNF808"
## [1] 31
## [1] "COMMD6" "COTL1" "RPL41" "TOMM7" "TPGS2" "TRAF3IP2"
## [7] "YPEL3" "ARHGAP35" "B2M" "CD8A" "CD8B" "FNDC3B"
## [13] "GCA" "HEXDC" "KIAA0232" "LSS" "MKL1" "OAS3"
## [19] "POLG" "PRSS23" "SBK1" "TAF1D" "TMSB10" "TMSB4X"
## [1] 32
## [1] "CD28" "TLE4" "ARAP1" "C5orf24" "CCL4" "CES1"
## [7] "CROCC" "ETNK1" "GNLY" "GNPTAB" "GPR141" "INPP5D"
## [13] "KIAA2026" "MKLN1" "NARFL" "PDZD4" "PEX1" "PEX26"
## [19] "SEMA4D" "ST8SIA4" "SUSD1" "SYNE1" "UBR2" "WDTC1"
## [25] "YPEL5"
## [1] 33
## [1] "FCRL3" "ITGB1BP1" "ANKRD12" "APOL6" "BTN3A1" "CCNH"
## [7] "CD38" "EML4" "ERICH1" "FAM13B" "ITGAL" "PREX1"
## [13] "PYROXD1" "RHOH" "SLA2" "SLC35F5" "SLFN5" "STAT4"
## [19] "SYNRG" "TAOK1" "TAOK3" "TIPARP" "TRBC2" "UNC13D"
## [25] "ZAP70"
## [1] 34
## [1] "AC025164.1" "AL135791.1" "ALKBH7" "BEX4" "BNIP3L"
## [6] "C6orf48" "CD40LG" "COA1" "HIST1H3H" "JAML"
## [11] "LINC02273" "MCUB" "MYLIP" "NR1D1" "SESN1"
## [16] "SNHG12" "TMIGD2" "TRAV1-2" "TRAV13-1" "TRBV20-1"
## [21] "CISH" "CRYBG1" "R3HCC1L" "RFWD3" "SETX"
## [1] 35
## [1] "AC007952.4" "AC245014.3" "CITED2" "COQ7" "CSRNP1"
## [6] "DYRK4" "EFCAB2" "FAM173A" "GADD45B" "GLRX5"
## [11] "IER2" "KCTD7" "MPST" "MZF1" "NELL2"
## [16] "NR4A2" "TCP11L2" "TGIF1" "ZFAS1" "ZFP36L1"
## [21] "LTBP4" "PRR14L" "SETD5" "ZBTB40"
## [1] 36
## [1] "C10orf91" "AC020659.1" "ANKRD36B" "ANKRD36C" "BICRAL"
## [6] "CHD9" "COL6A2" "COL6A3" "CPPED1" "DENND4B"
## [11] "EHBP1L1" "IQCG" "LAIR2" "MINDY2" "NLRC3"
## [16] "RAB27B" "RNF19A" "SLC16A1-AS1" "SYTL3" "TRDC"
## [21] "TTC16" "VPS13C" "VTI1A" "XCL1" "ZNF708"
## [1] 37
## [1] "BACH2" "EGR1" "FOSB" "HIST1H4C" "JUNB" "MAF1"
## [7] "NUCB2" "RGS10" "RPL22L1" "SEPT6" "SNHG8" "TSR3"
## [13] "ACAP3" "ARRDC3" "CYTH1" "FOSL2" "JUND" "L3HYPDH"
## [19] "MAF" "MIDN" "POLR3D"
## [1] 38
## [1] "CD83" "COG5" "HMBOX1" "IKZF2" "KIF9" "NXT2"
## [7] "ANKRD36" "ANKRD49" "BCL9L" "CD226" "COG7" "FNBP1"
## [13] "INPP4A" "IRAK4" "PPP1R16B" "RAP1GAP2" "SPN" "SSBP3"
## [19] "THADA" "UVRAG" "VPS18" "VPS39" "XPO6"
## [1] 39
## [1] "AL136454.1" "SERINC5" "ABCA5" "ADHFE1" "CHD6"
## [6] "DDIT4" "DDX3Y" "DUS1L" "EIF1AY" "ENOSF1"
## [11] "FGL2" "GBP5" "HECTD4" "ISG20" "KDM5D"
## [16] "KIAA1109" "LPIN1" "OSM" "PARP11" "RPS4Y1"
## [21] "SBNO2" "SUSD6" "TARSL2" "ZMIZ2" "ZNF236"
## [1] 40
## [1] "CXorf57" "ID1" "PLK2" "SBF2" "ABR" "ANKRD44" "EHD4"
## [8] "EIF4E3" "FAM53B" "H6PD" "MYOM2" "NEK9" "NRDE2" "PPIL2"
## [15] "PRDM2" "PTGDS" "RBSN" "RREB1" "SBF1" "TBC1D2B" "TMX3"
## [22] "UQCC2" "ZNF407" "ZNF827"
for(n1 in names(goseq_res)){
k = as.numeric(gsub("set_", "", n1))
print(n1)
print(gene_sets[[k]])
print(goseq_res[[n1]])
}## [1] "set_1"
## [1] "AC008555.5" "AC012645.3" "AC044849.1" "AC087623.3" "AC119396.1"
## [6] "AC245297.3" "ARRDC2" "CCL4L2" "CLDND1" "CRLF3"
## [11] "EOMES" "HIKESHI" "INTS6L" "LINC00649" "PITPNC1"
## [16] "TRAV8-3" "TRGV7" "TRGV8" "ADGRG1" "ARHGAP30"
## [21] "CYTOR" "FCRL6" "NECAP1" "TTC38" "XCL2"
## $go_bp
## category over_represented_pvalue
## 943 forebrain regionalization 2.99102e-05
## 4491 telencephalon regionalization 2.99102e-05
## under_represented_pvalue numDEInCat numInCat FDR
## 943 1 2 2 0.07015437
## 4491 1 2 2 0.07015437
##
## [1] "set_15"
## [1] "ACTR1B" "C12orf10" "EIF1" "EIF2S3" "EIF3E" "EIF3G"
## [7] "EIF3K" "EIF3L" "EIF4A2" "PAIP2" "PHLDA1" "RACK1"
## [13] "ATAD2" "DDX60" "EIF3A" "EIF4G1" "EIF4G3" "LRRFIP1"
## [19] "MSI2" "PRRC2C" "SECISBP2"
## $reactome
## category
## 26 activation of the mrna upon binding of the cap binding complex and eifs and subse
## 296 eukaryotic translation initiation
## 1116 translation
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 26 6.132043e-08 1.0000000 8 45
## 296 2.043390e-05 0.9999983 8 94
## 1116 5.254185e-05 0.9999951 8 106
## FDR
## 26 7.131566e-05
## 296 1.188231e-02
## 1116 2.036872e-02
##
## $go_bp
## category
## 608 cytoplasmic translational initiation
## 4559 translational initiation
## 3983 regulation of translational initiation
## 946 formation of cytoplasmic translation initiation complex
## 607 cytoplasmic translation
## 3103 protein rna complex organization
## 4672 viral translation
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 608 0.000000e+00 1.0000000 8 16
## 4559 0.000000e+00 1.0000000 11 35
## 3983 2.968284e-09 1.0000000 7 22
## 946 2.266802e-08 1.0000000 5 9
## 607 4.539713e-07 1.0000000 9 97
## 3103 2.471536e-05 0.9999986 6 52
## 4672 3.715317e-05 0.9999997 3 6
## FDR
## 608 0.000000e+00
## 4559 0.000000e+00
## 3983 4.641407e-06
## 946 2.658392e-05
## 607 4.259159e-04
## 3103 1.932329e-02
## 4672 2.489793e-02
##
## [1] "set_17"
## [1] "ARL4A" "IL6R" "RGCC" "TXK" "UBL7" "ZC3H12A" "ZNF10"
## [8] "CLUH" "COX19" "ELMO1" "ETV6" "GON4L" "LONP2" "NAA25"
## [15] "NFKBIZ" "PARP14" "PARP9" "PDE4B" "PHF14" "PIGF" "SETD2"
## [22] "USP34" "WDR37" "ZNF557"
## $go_bp
## category over_represented_pvalue
## 3864 regulation of response to type ii interferon 1.212542e-05
## under_represented_pvalue numDEInCat numInCat FDR
## 3864 1 3 4 0.05688035
##
## [1] "set_21"
## [1] "CAMK4" "CMTM7" "EPHX2" "EPS8" "FAM102A" "HIBADH"
## [7] "LDLRAP1" "NOSIP" "RCAN3" "SELL" "STMN3" "TCEA3"
## [13] "TCF7" "TESPA1" "TRAV21" "ZNF575" "GPANK1" "HERC3"
## [19] "HERC6" "ITPR2" "MAPK8IP3" "SCRN3" "SOS1" "VCAN"
## [25] "ZNF493"
## $immune
## category over_represented_pvalue
## 4253 gse45739 unstim vs acd3 acd28 stim wt cd4 tcell up 1.477794e-05
## 1959 gse22886 naive cd4 tcell vs monocyte up 4.427647e-05
## 120 gse10325 lupus cd4 tcell vs lupus bcell up 5.367498e-05
## under_represented_pvalue numDEInCat numInCat FDR
## 4253 0.9999992 6 38 0.07533793
## 1959 0.9999961 7 67 0.09121169
## 120 0.9999952 7 69 0.09121169
##
## [1] "set_23"
## [1] "ALOX5AP" "APMAP" "HDHD3" "LAPTM5" "LBH" "LIME1" "MATK"
## [8] "PTPRCAP" "PTRHD1" "RHOC" "THEM4" "TMEM134" "CBX7" "CST7"
## [15] "FGFBP2" "GZMA" "GZMB" "NKG7" "PHC3" "PLEK" "SPON2"
## [22] "SRGN" "UBE4A" "UCP2"
## $immune
## category
## 4250 gse45739 unstim vs acd3 acd28 stim nras ko cd4 tcell dn
## 2392 gse26495 naive vs pd1low cd8 tcell dn
## 1967 gse22886 naive cd8 tcell vs memory tcell up
## 2281 gse25123 ctrl vs il4 stim macrophage up
## 4252 gse45739 unstim vs acd3 acd28 stim wt cd4 tcell dn
## 1911 gse22886 cd8 tcell vs bcell naive up
## 1620 gse21063 wt vs nfatc1 ko 16h anti igm stim bcell dn
## 2801 gse29618 pdc vs mdc day7 flu vaccine up
## 4952 hoft pbmc tice bcg rbcg ag85a ag85b age 18 40yo correlated with whole blood bacte
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4250 1.270944e-06 0.9999999 8 54
## 2392 1.042593e-05 0.9999991 8 77
## 1967 1.494111e-05 0.9999992 6 38
## 2281 2.091328e-05 0.9999988 6 41
## 4252 6.892515e-05 0.9999935 7 70
## 1911 7.560678e-05 0.9999928 7 68
## 1620 1.264718e-04 0.9999953 4 17
## 2801 1.309919e-04 0.9999919 5 35
## 4952 1.719437e-04 0.9999930 4 19
## FDR
## 4250 0.006479275
## 2392 0.025389924
## 1967 0.025389924
## 2281 0.026653979
## 4252 0.064240557
## 1911 0.064240557
## 1620 0.083474564
## 2801 0.083474564
## 4952 0.097396579
##
## [1] "set_27"
## [1] "HMGCS1" "RELT" "TPRKB" "CAPN15" "DNAJC13" "EPSTI1"
## [7] "GPATCH2L" "HSH2D" "IFI44" "IFI44L" "IGHA1" "IGKC"
## [13] "MBD5" "MX1" "PHF11" "PPM1K" "PRKX" "PTPRJ"
## [19] "RNF213" "S100A8" "SPOCK2" "STK10" "TTC14"
## $immune
## category
## 3047 gse33424 cd161 int vs neg cd8 tcell up
## 1336 gse18791 ctrl vs newcastle virus dc 4h dn
## 12 erwin cohen blood vaccine tc 83 age 23 48yo vaccinated vs control 7dy up
## 1290 gse17974 il4 and anti il12 vs untreated 48h act cd4 tcell dn
## 1701 gse21546 wt vs sap1a ko dp thymocytes up
## 2409 gse26890 cxcr1 neg vs pos effector cd8 tcell up
## 1340 gse18791 ctrl vs newcastle virus dc 8h dn
## 1344 gse18791 unstim vs newcatsle virus dc 18h dn
## 2345 gse26030 th1 vs th17 day5 post polarization up
## 375 gse13485 ctrl vs day3 yf17d vaccine pbmc dn
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 3047 4.907423e-07 1.0000000 8 57
## 1336 4.426198e-06 0.9999997 7 53
## 12 5.785920e-06 0.9999997 6 36
## 1290 7.987357e-06 0.9999996 6 36
## 1701 8.168824e-06 0.9999995 7 59
## 2409 8.668795e-06 0.9999994 7 59
## 1340 1.043667e-05 0.9999993 7 60
## 1344 2.281256e-05 0.9999986 6 45
## 2345 2.470588e-05 0.9999985 6 47
## 375 2.534133e-05 0.9999985 6 46
## FDR
## 3047 0.002501804
## 1336 0.007365586
## 12 0.007365586
## 1290 0.007365586
## 1701 0.007365586
## 2409 0.007365586
## 1340 0.007600881
## 1344 0.012919012
## 2345 0.012919012
## 375 0.012919012
##
## [1] "set_29"
## [1] "CST3" "CYB5D2" "GALNT11" "INTS8" "PDE3B" "SAE1"
## [7] "ADAM10" "AHCTF1" "ASCL2" "CHST12" "CTSC" "DOPEY2"
## [13] "FAR1" "GALNT2" "HLA-DQA1" "HLA-DRB1" "KLRD1" "LILRB1"
## [19] "LPCAT1" "MPPE1" "MYO9B" "PDE12" "PNPLA6" "PRF1"
## $go_bp
## category over_represented_pvalue
## 4472 t cell mediated cytotoxicity 1.367049e-06
## under_represented_pvalue numDEInCat numInCat FDR
## 4472 1 5 14 0.006412827
##
## [1] "set_36"
## [1] "C10orf91" "AC020659.1" "ANKRD36B" "ANKRD36C" "BICRAL"
## [6] "CHD9" "COL6A2" "COL6A3" "CPPED1" "DENND4B"
## [11] "EHBP1L1" "IQCG" "LAIR2" "MINDY2" "NLRC3"
## [16] "RAB27B" "RNF19A" "SLC16A1-AS1" "SYTL3" "TRDC"
## [21] "TTC16" "VPS13C" "VTI1A" "XCL1" "ZNF708"
## $reactome
## category
## 159 collagen biosynthesis and modifying enzymes
## 160 collagen chain trimerization
## 72 assembly of collagen fibrils and other multimeric structures
## 162 collagen formation
## 590 ncam1 interactions
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 159 9.878759e-05 1.0000000 2 2
## 160 9.878759e-05 1.0000000 2 2
## 72 2.897978e-04 0.9999992 2 3
## 162 2.897978e-04 0.9999992 2 3
## 590 2.946780e-04 0.9999992 2 3
## FDR
## 159 0.05744498
## 160 0.05744498
## 72 0.06854210
## 162 0.06854210
## 590 0.06854210
##
## [1] "set_37"
## [1] "BACH2" "EGR1" "FOSB" "HIST1H4C" "JUNB" "MAF1"
## [7] "NUCB2" "RGS10" "RPL22L1" "SEPT6" "SNHG8" "TSR3"
## [13] "ACAP3" "ARRDC3" "CYTH1" "FOSL2" "JUND" "L3HYPDH"
## [19] "MAF" "MIDN" "POLR3D"
## $reactome
## category
## 617 ngf stimulated transcription
## 640 nuclear events kinase and transcription factor activation
## 971 signaling by ntrks
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 617 3.003971e-06 1.0000000 4 11
## 640 8.960668e-06 0.9999999 4 14
## 971 4.757959e-05 0.9999988 4 21
## FDR
## 617 0.003493619
## 640 0.005210628
## 971 0.018445022
##
## [1] "set_38"
## [1] "CD83" "COG5" "HMBOX1" "IKZF2" "KIF9" "NXT2"
## [7] "ANKRD36" "ANKRD49" "BCL9L" "CD226" "COG7" "FNBP1"
## [13] "INPP4A" "IRAK4" "PPP1R16B" "RAP1GAP2" "SPN" "SSBP3"
## [19] "THADA" "UVRAG" "VPS18" "VPS39" "XPO6"
## $reactome
## category over_represented_pvalue
## 895 sars cov 2 modulates autophagy 1.914726e-06
## under_represented_pvalue numDEInCat numInCat FDR
## 895 1 3 3 0.002226826
##
## $go_bp
## category over_represented_pvalue under_represented_pvalue
## 4354 snare complex assembly 2.345865e-06 1
## numDEInCat numInCat FDR
## 4354 3 3 0.01100445
##
## [1] "set_39"
## [1] "AL136454.1" "SERINC5" "ABCA5" "ADHFE1" "CHD6"
## [6] "DDIT4" "DDX3Y" "DUS1L" "EIF1AY" "ENOSF1"
## [11] "FGL2" "GBP5" "HECTD4" "ISG20" "KDM5D"
## [16] "KIAA1109" "LPIN1" "OSM" "PARP11" "RPS4Y1"
## [21] "SBNO2" "SUSD6" "TARSL2" "ZMIZ2" "ZNF236"
## $immune
## category
## 5089 van den biggelaar pbmc prevnar 9mo infant stimulated vs unstimulated 8mo up
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 5089 1.504061e-05 0.9999999 3 4
## FDR
## 5089 0.07667701
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8958358 478.5 16391124 875.4 NA 16391124 875.4
## Vcells 19169814 146.3 59968765 457.6 65536 91658037 699.3
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
## [2] GenomicFeatures_1.50.4
## [3] GenomicRanges_1.50.2
## [4] GenomeInfoDb_1.34.9
## [5] org.Hs.eg.db_3.16.0
## [6] AnnotationDbi_1.60.2
## [7] IRanges_2.32.0
## [8] S4Vectors_0.36.2
## [9] Biobase_2.58.0
## [10] BiocGenerics_0.44.0
## [11] goseq_1.50.0
## [12] geneLenDataBase_1.34.0
## [13] BiasedUrn_2.0.10
## [14] fgsea_1.24.0
## [15] biomaRt_2.54.1
## [16] limma_3.54.2
## [17] tidyr_1.3.0
## [18] ggpubr_0.6.0
## [19] ggplot2_3.4.2
## [20] data.table_1.14.8
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-162 matrixStats_1.0.0
## [3] bitops_1.0-7 bit64_4.0.5
## [5] filelock_1.0.2 progress_1.2.2
## [7] httr_1.4.6 tools_4.2.3
## [9] backports_1.4.1 bslib_0.4.2
## [11] utf8_1.2.3 R6_2.5.1
## [13] mgcv_1.8-42 DBI_1.1.3
## [15] colorspace_2.1-0 withr_2.5.0
## [17] tidyselect_1.2.0 prettyunits_1.1.1
## [19] bit_4.0.5 curl_5.0.1
## [21] compiler_4.2.3 cli_3.6.1
## [23] xml2_1.3.4 DelayedArray_0.24.0
## [25] rtracklayer_1.58.0 sass_0.4.5
## [27] scales_1.2.1 rappdirs_0.3.3
## [29] Rsamtools_2.14.0 stringr_1.5.0
## [31] digest_0.6.31 rmarkdown_2.21
## [33] XVector_0.38.0 pkgconfig_2.0.3
## [35] htmltools_0.5.5 MatrixGenerics_1.10.0
## [37] dbplyr_2.3.2 fastmap_1.1.1
## [39] rlang_1.1.0 rstudioapi_0.14
## [41] RSQLite_2.3.1 BiocIO_1.8.0
## [43] jquerylib_0.1.4 generics_0.1.3
## [45] jsonlite_1.8.4 BiocParallel_1.32.6
## [47] dplyr_1.1.2 car_3.1-2
## [49] RCurl_1.98-1.12 magrittr_2.0.3
## [51] GO.db_3.16.0 GenomeInfoDbData_1.2.9
## [53] Matrix_1.6-4 Rcpp_1.0.10
## [55] munsell_0.5.0 fansi_1.0.4
## [57] abind_1.4-5 lifecycle_1.0.3
## [59] stringi_1.7.12 yaml_2.3.7
## [61] carData_3.0-5 SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0 BiocFileCache_2.6.1
## [65] grid_4.2.3 blob_1.2.4
## [67] parallel_4.2.3 crayon_1.5.2
## [69] lattice_0.20-45 splines_4.2.3
## [71] Biostrings_2.66.0 cowplot_1.1.1
## [73] hms_1.1.3 KEGGREST_1.38.0
## [75] knitr_1.44 pillar_1.9.0
## [77] rjson_0.2.21 ggsignif_0.6.4
## [79] codetools_0.2-19 fastmatch_1.1-3
## [81] XML_3.99-0.14 glue_1.6.2
## [83] evaluate_0.20 png_0.1-8
## [85] vctrs_0.6.2 gtable_0.3.3
## [87] purrr_1.0.1 cachem_1.0.7
## [89] xfun_0.39 broom_1.0.4
## [91] restfulr_0.0.15 rstatix_0.7.2
## [93] tibble_3.2.1 GenomicAlignments_1.34.1
## [95] memoise_2.0.1